In [1]:
import pandas as pd
import plotly.graph_objects as go
In [2]:
import pandas as pd

# Specify the path to your CSV file
csv_file_path = r"C:\Users\User\Desktop\Data Analytics\activesellers.csv"

# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(csv_file_path)
In [3]:
print(df.head())
print(df.info())
print(df.describe())
                 merchantid  listedproducts  totalunitssold  \
0  5357bcf2bb72c5504882e889               2          120000   
1  5708773c3c02161b3f8c7900               5          107100   
2  5417aada4ad3ab27e954b76c               2          100007   
3  570f3a713a698c14278bb51e               1          100000   
4  53082ea15aefb07dfe1f2a4f               1          100000   

   meanunitssoldperproduct  rating  merchantratingscount  meanproductprices  \
0                  60000.0   4.219              320031.0               9.00   
1                  21420.0   3.934              139223.0               7.76   
2                  50004.0   4.053              108048.0               8.00   
3                 100000.0   3.889               19248.0               5.67   
4                 100000.0   4.036              366898.0               5.00   

   meanretailprices  averagediscount  meandiscount  meanproductratingscount  \
0              20.0             54.0          54.0                   8836.0   
1              34.2             61.0          61.0                   4010.0   
2               8.0             -1.0          -1.0                   5531.0   
3              19.0             71.0          71.0                  18393.0   
4              33.0             85.0          85.0                  13789.0   

   totalurgencycount  urgencytextrate  
0                1.0             50.0  
1                3.0             60.0  
2                NaN              NaN  
3                1.0            100.0  
4                NaN              NaN  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 958 entries, 0 to 957
Data columns (total 13 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   merchantid               958 non-null    object 
 1   listedproducts           958 non-null    int64  
 2   totalunitssold           958 non-null    int64  
 3   meanunitssoldperproduct  958 non-null    float64
 4   rating                   958 non-null    float64
 5   merchantratingscount     958 non-null    float64
 6   meanproductprices        958 non-null    float64
 7   meanretailprices         958 non-null    float64
 8   averagediscount          958 non-null    float64
 9   meandiscount             958 non-null    float64
 10  meanproductratingscount  958 non-null    float64
 11  totalurgencycount        391 non-null    float64
 12  urgencytextrate          391 non-null    float64
dtypes: float64(10), int64(2), object(1)
memory usage: 97.4+ KB
None
       listedproducts  totalunitssold  meanunitssoldperproduct      rating  \
count      958.000000      958.000000               958.000000  958.000000   
mean         1.641962     7124.483299              4407.605428    4.043994   
std          1.246183    14363.521893              9167.077812    0.222137   
min          1.000000        1.000000                 1.000000    2.333000   
25%          1.000000      100.000000               100.000000    3.931000   
50%          1.000000     1000.000000              1000.000000    4.055500   
75%          2.000000    10000.000000              5000.000000    4.190000   
max         15.000000   120000.000000            100000.000000    5.000000   

       merchantratingscount  meanproductprices  meanretailprices  \
count          9.580000e+02         958.000000        958.000000   
mean           2.202045e+04           8.634906         24.802265   
std            8.473232e+04           3.987599         30.258719   
min            0.000000e+00           1.000000          1.000000   
25%            1.373000e+03           6.000000          7.000000   
50%            5.990500e+03           8.000000         11.000000   
75%            1.723850e+04          11.000000         28.875000   
max            2.174765e+06          49.000000        252.000000   

       averagediscount  meandiscount  meanproductratingscount  \
count       958.000000    958.000000               958.000000   
mean         28.721294     28.698017               922.538622   
std          39.918708     39.914269              1925.750178   
min         -18.000000    -18.000000                 0.000000   
25%         -10.000000    -10.000000                31.000000   
50%          16.000000     16.000000               210.500000   
75%          71.000000     71.000000               920.750000   
max          97.000000     97.000000             20744.000000   

       totalurgencycount  urgencytextrate  
count         391.000000       391.000000  
mean            1.209719        65.572890  
std             0.583542        28.476426  
min             1.000000        14.000000  
25%             1.000000        50.000000  
50%             1.000000        50.000000  
75%             1.000000       100.000000  
max             6.000000       100.000000  
In [4]:
import matplotlib.pyplot as plt

plt.hist(df['totalunitssold'], bins=20, color='skyblue', edgecolor='black')
plt.xlabel('Total Units Sold')
plt.ylabel('Frequency')
plt.title('Distribution of Total Units Sold')
plt.show()
No description has been provided for this image
In [5]:
plt.scatter(df['totalunitssold'], df['rating'], color='green')
plt.xlabel('Total Units Sold')
plt.ylabel('Rating')
plt.title('Relationship between Total Units Sold and Rating')
plt.show()
No description has been provided for this image
In [6]:
plt.bar(df['merchantid'][:10], df['listedproducts'][:10], color='orange')
plt.xlabel('Merchant ID')
plt.ylabel('Listed Products')
plt.title('Top 10 Merchants by Listed Products')
plt.xticks(rotation=45)
plt.show()
No description has been provided for this image
In [7]:
plt.boxplot(df['meanunitssoldperproduct'], vert=False)
plt.xlabel('Mean Units Sold per Product')
plt.title('Box Plot of Mean Units Sold per Product')
plt.show()
No description has been provided for this image
In [8]:
plt.plot(df['meanproductprices'], color='red')
plt.xlabel('Index')
plt.ylabel('Mean Product Prices')
plt.title('Trend of Mean Product Prices')
plt.show()
No description has been provided for this image
In [9]:
plt.pie(df['listedproducts'][:5], labels=df['merchantid'][:5], autopct='%1.1f%%', startangle=140)
plt.axis('equal')
plt.title('Proportion of Listed Products by Top 5 Merchants')
plt.show()
No description has been provided for this image
In [12]:
import plotly.express as px


fig = px.scatter_geo(df, lat='listedproducts', lon='totalunitssold')

# Update layout to enable zooming
fig.update_geos(projection_type="natural earth", showcountries=True, showcoastlines=True)

# Show the plot
fig.show()
In [13]:
import pandas as pd
import plotly.graph_objects as go

# Read the CSV file
csv_file_path = r"C:\Users\User\Desktop\Data Analytics\activesellers.csv"
df = pd.read_csv(csv_file_path)

# Create a 3D bar graph using Plotly
fig = go.Figure(data=[
    go.Scatter3d(
        x=df['listedproducts'],
        y=df['totalunitssold'],
        z=df['rating'],
        mode='markers',  # You can also use 'lines' mode if you prefer
        marker=dict(
            size=12,
            color=df['rating'],                # Set color based on rating
            colorscale='Viridis',             # Choose a colorscale
            opacity=0.8,
            colorbar=dict(title='Rating')     # Add a colorbar
        ),
        text=df['merchantid'],                # Hover text
        hoverinfo='text'
    )
])

# Update layout to add titles and axis labels, and enable zooming
fig.update_layout(
    title='Interactive 3D Bar Graph',
    scene=dict(
        xaxis=dict(title='Listed Products'),
        yaxis=dict(title='Total Units Sold'),
        zaxis=dict(title='Rating'),
        camera=dict(
            eye=dict(x=1.2, y=1.2, z=1.2),  # Set initial camera position
            center=dict(x=0, y=0, z=0),      # Set camera center
            up=dict(x=0, y=0, z=1),           # Set up direction
        )
    )
)

# Show the plot
fig.show()
In [14]:
import pandas as pd
import plotly.graph_objects as go

# Read the CSV file
csv_file_path = r"C:\Users\User\Desktop\Data Analytics\activesellers.csv"
df = pd.read_csv(csv_file_path)

# Display the columns in the DataFrame to identify the correct column names
print(df.columns)

# Update the column names according to your data
# Replace 'Date', 'Listed Products', 'Total Units Sold', and 'Rating' with the appropriate column names from your DataFrame

# Create a line chart using Plotly
fig = go.Figure()

# Add line traces for each attribute
fig.add_trace(go.Scatter(x=df['totalurgencycount'], y=df['listedproducts'], mode='lines', name='Listed Products'))
fig.add_trace(go.Scatter(x=df['totalurgencycount'], y=df['totalunitssold'], mode='lines', name='Total Units Sold'))
fig.add_trace(go.Scatter(x=df['totalurgencycount'], y=df['rating'], mode='lines', name='Rating'))

# Update layout to add titles and axis labels, and enable zooming
fig.update_layout(
    title='Zoomable Line Chart',
    xaxis=dict(title='Date', rangeslider=dict(visible=True)),  # Enable rangeslider for zooming on x-axis
    yaxis=dict(title='Values'),
    hovermode='x'  # Show hover information only for x-axis
)

# Show the plot
fig.show()
Index(['merchantid', 'listedproducts', 'totalunitssold',
       'meanunitssoldperproduct', 'rating', 'merchantratingscount',
       'meanproductprices', 'meanretailprices', 'averagediscount',
       'meandiscount', 'meanproductratingscount', 'totalurgencycount',
       'urgencytextrate'],
      dtype='object')
In [15]:
import pandas as pd
import plotly.graph_objects as go

# Read the CSV file
csv_file_path = r"C:\Users\User\Desktop\Data Analytics\activesellers.csv"
df = pd.read_csv(csv_file_path)

# Display the columns in the DataFrame to identify the correct column names
print(df.columns)

# Update the column names according to your data
# Replace 'Listed Products', 'Total Units Sold', and 'Rating' with the appropriate column names from your DataFrame

# Create a column chart using Plotly
fig = go.Figure()

# Add column traces for each attribute
fig.add_trace(go.Bar(x=df['totalurgencycount'], y=df['listedproducts'], name='Listed Products'))
fig.add_trace(go.Bar(x=df['totalurgencycount'], y=df['totalunitssold'], name='Total Units Sold'))
fig.add_trace(go.Bar(x=df['totalurgencycount'], y=df['rating'], name='Rating'))

# Update layout to add titles and axis labels
fig.update_layout(
    title='Column Chart',
    xaxis=dict(title='Merchant ID'),
    yaxis=dict(title='Values'),
    barmode='group'  # Use 'group' for grouped bars or 'stack' for stacked bars
)

# Show the plot
fig.show()
Index(['merchantid', 'listedproducts', 'totalunitssold',
       'meanunitssoldperproduct', 'rating', 'merchantratingscount',
       'meanproductprices', 'meanretailprices', 'averagediscount',
       'meandiscount', 'meanproductratingscount', 'totalurgencycount',
       'urgencytextrate'],
      dtype='object')
In [16]:
import pandas as pd
import plotly.graph_objects as go

# Read the CSV file
csv_file_path = r"C:\Users\User\Desktop\Data Analytics\activesellers.csv"
df = pd.read_csv(csv_file_path)

# Display the columns in the DataFrame to identify the correct column names
print(df.columns)

# Update the column names according to your data
# Replace 'merchantid', 'listedproducts', and 'totalunitssold' with the appropriate column names from your DataFrame

# Create a Treemap chart using Plotly
fig = go.Figure(go.Treemap(
    labels=df['merchantid'],  # Labels for the Treemap
    parents=[""] * len(df),   # Empty strings indicate the root of the Treemap
    values=df['listedproducts'],  # Values for each rectangle in the Treemap
    textinfo="label+value"     # Display both label and value when hovering over a rectangle
))

# Update layout to add a title
fig.update_layout(
    title='Treemap Chart'
)

# Show the plot
fig.show()
Index(['merchantid', 'listedproducts', 'totalunitssold',
       'meanunitssoldperproduct', 'rating', 'merchantratingscount',
       'meanproductprices', 'meanretailprices', 'averagediscount',
       'meandiscount', 'meanproductratingscount', 'totalurgencycount',
       'urgencytextrate'],
      dtype='object')
In [17]:
import pandas as pd
import plotly.graph_objects as go

# Read the CSV file
csv_file_path = r"C:\Users\User\Desktop\Data Analytics\activesellers.csv"
df = pd.read_csv(csv_file_path)

# Calculate the cumulative sum of 'totalunitssold'
df_sorted = df.sort_values(by='totalunitssold', ascending=False)
df_sorted['cumulative_percentage'] = (df_sorted['totalunitssold'].cumsum() / df_sorted['totalunitssold'].sum()) * 100

# Create Pareto chart using Plotly
fig = go.Figure()

# Add bar chart for 'totalunitssold'
fig.add_trace(go.Bar(
    x=df_sorted['merchantid'],
    y=df_sorted['totalunitssold'],
    name='Total Units Sold',
    marker=dict(color='blue')
))

# Add line chart for cumulative percentage
fig.add_trace(go.Scatter(
    x=df_sorted['merchantid'],
    y=df_sorted['cumulative_percentage'],
    name='Cumulative Percentage',
    yaxis='y2',
    line=dict(color='red', width=4)
))

# Update layout to add titles and axis labels
fig.update_layout(
    title='Pareto Chart',
    xaxis=dict(title='Merchant ID'),
    yaxis=dict(title='Total Units Sold', side='left', color='blue'),
    yaxis2=dict(title='Cumulative Percentage', overlaying='y', side='right', color='red', range=[0, 100])
)

# Show the plot
fig.show()
In [18]:
import pandas as pd
import plotly.graph_objects as go

# Sample data
data = {
    'category': ['Starting', 'Sales', 'Refunds', 'Net Sales', 'Expenses', 'Profit'],
    'amount': [100000, -30000, 5000, None, -20000, None]
}

# Create DataFrame
df = pd.DataFrame(data)

# Calculate the cumulative sum for the waterfall chart
df['cumulative'] = df['amount'].cumsum()

# Set up the text to display on the chart
text = ['Starting', 'Sales', 'Refunds', 'Net Sales', 'Expenses', 'Profit']

# Create the waterfall chart
fig = go.Figure(go.Waterfall(
    name='20', orientation='v',
    x=df['category'],
    textposition='outside',
    text=text,
    y=df['cumulative'],
    connector={'line': {'color': 'rgb(63, 63, 63)'}},
    decreasing={'marker': {'color': 'red'}},
    increasing={'marker': {'color': 'green'}},
    totals={'marker': {'color': 'blue', 'line': {'color': 'blue', 'width': 3}}},
))

# Update layout
fig.update_layout(
    title='Waterfall Chart',
    showlegend=False
)

# Show the plot
fig.show()
In [26]:
import pandas as pd
import plotly.graph_objects as go
In [27]:
import pandas as pd

# Specify the path to your CSV file
csv_file_path = r"C:\Users\User\Desktop\Data Analytics\activesellers.csv"

# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(csv_file_path)
In [28]:
print(df.head())
print(df.info())
print(df.describe())
                 merchantid  listedproducts  totalunitssold  \
0  5357bcf2bb72c5504882e889               2          120000   
1  5708773c3c02161b3f8c7900               5          107100   
2  5417aada4ad3ab27e954b76c               2          100007   
3  570f3a713a698c14278bb51e               1          100000   
4  53082ea15aefb07dfe1f2a4f               1          100000   

   meanunitssoldperproduct  rating  merchantratingscount  meanproductprices  \
0                  60000.0   4.219              320031.0               9.00   
1                  21420.0   3.934              139223.0               7.76   
2                  50004.0   4.053              108048.0               8.00   
3                 100000.0   3.889               19248.0               5.67   
4                 100000.0   4.036              366898.0               5.00   

   meanretailprices  averagediscount  meandiscount  meanproductratingscount  \
0              20.0             54.0          54.0                   8836.0   
1              34.2             61.0          61.0                   4010.0   
2               8.0             -1.0          -1.0                   5531.0   
3              19.0             71.0          71.0                  18393.0   
4              33.0             85.0          85.0                  13789.0   

   totalurgencycount  urgencytextrate  
0                1.0             50.0  
1                3.0             60.0  
2                NaN              NaN  
3                1.0            100.0  
4                NaN              NaN  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 958 entries, 0 to 957
Data columns (total 13 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   merchantid               958 non-null    object 
 1   listedproducts           958 non-null    int64  
 2   totalunitssold           958 non-null    int64  
 3   meanunitssoldperproduct  958 non-null    float64
 4   rating                   958 non-null    float64
 5   merchantratingscount     958 non-null    float64
 6   meanproductprices        958 non-null    float64
 7   meanretailprices         958 non-null    float64
 8   averagediscount          958 non-null    float64
 9   meandiscount             958 non-null    float64
 10  meanproductratingscount  958 non-null    float64
 11  totalurgencycount        391 non-null    float64
 12  urgencytextrate          391 non-null    float64
dtypes: float64(10), int64(2), object(1)
memory usage: 97.4+ KB
None
       listedproducts  totalunitssold  meanunitssoldperproduct      rating  \
count      958.000000      958.000000               958.000000  958.000000   
mean         1.641962     7124.483299              4407.605428    4.043994   
std          1.246183    14363.521893              9167.077812    0.222137   
min          1.000000        1.000000                 1.000000    2.333000   
25%          1.000000      100.000000               100.000000    3.931000   
50%          1.000000     1000.000000              1000.000000    4.055500   
75%          2.000000    10000.000000              5000.000000    4.190000   
max         15.000000   120000.000000            100000.000000    5.000000   

       merchantratingscount  meanproductprices  meanretailprices  \
count          9.580000e+02         958.000000        958.000000   
mean           2.202045e+04           8.634906         24.802265   
std            8.473232e+04           3.987599         30.258719   
min            0.000000e+00           1.000000          1.000000   
25%            1.373000e+03           6.000000          7.000000   
50%            5.990500e+03           8.000000         11.000000   
75%            1.723850e+04          11.000000         28.875000   
max            2.174765e+06          49.000000        252.000000   

       averagediscount  meandiscount  meanproductratingscount  \
count       958.000000    958.000000               958.000000   
mean         28.721294     28.698017               922.538622   
std          39.918708     39.914269              1925.750178   
min         -18.000000    -18.000000                 0.000000   
25%         -10.000000    -10.000000                31.000000   
50%          16.000000     16.000000               210.500000   
75%          71.000000     71.000000               920.750000   
max          97.000000     97.000000             20744.000000   

       totalurgencycount  urgencytextrate  
count         391.000000       391.000000  
mean            1.209719        65.572890  
std             0.583542        28.476426  
min             1.000000        14.000000  
25%             1.000000        50.000000  
50%             1.000000        50.000000  
75%             1.000000       100.000000  
max             6.000000       100.000000  
In [29]:
plt.figure(figsize=(10, 6))
sns.boxplot(x='totalurgencycount', y='totalunitssold', data=df)
plt.xlabel('Total Urgency Count')
plt.ylabel('Total Units Sold')
plt.title('Boxplot of Total Units Sold Grouped by Total Urgency Count')
plt.show()
No description has been provided for this image
In [30]:
plt.figure(figsize=(10, 6))
sns.violinplot(x='totalurgencycount', y='totalunitssold', data=df)
plt.xlabel('Total Urgency Count')
plt.ylabel('Total Units Sold')
plt.title('Violin Plot of Total Units Sold Grouped by Total Urgency Count')
plt.show()
No description has been provided for this image
In [31]:
sns.pairplot(df[['listedproducts', 'totalunitssold', 'meanunitssoldperproduct', 'rating']])
plt.title('Pairplot of Selected Numerical Variables')
plt.show()
No description has been provided for this image
In [32]:
import seaborn as sns
import matplotlib.pyplot as plt

# Select only numeric columns
numeric_df = df.select_dtypes(include=['number'])

plt.figure(figsize=(10, 8))
sns.heatmap(numeric_df.corr(), annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Heatmap')
plt.show()
No description has been provided for this image
In [ ]: